

<!DOCTYPE html>
<html class="writer-html5" lang="zh" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>宽爬行 &mdash; Scrapy 2.3.0 文档</title>
  

  
  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster.custom.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster.bundle.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster-sideTip-shadow.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster-sideTip-punk.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster-sideTip-noir.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster-sideTip-light.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tooltipster-sideTip-borderless.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/micromodal.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/sphinx_rtd_theme.css" type="text/css" />

  
  
  
  

  
  <!--[if lt IE 9]>
    <script src="../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/doctools.js"></script>
        <script src="../_static/language_data.js"></script>
        <script src="../_static/js/hoverxref.js"></script>
        <script src="../_static/js/tooltipster.bundle.min.js"></script>
        <script src="../_static/js/micromodal.min.js"></script>
    
    <script type="text/javascript" src="../_static/js/theme.js"></script>

    
    <link rel="index" title="索引" href="../genindex.html" />
    <link rel="search" title="搜索" href="../search.html" />
    <link rel="next" title="使用浏览器的开发人员工具进行抓取" href="developer-tools.html" />
    <link rel="prev" title="常用做法" href="practices.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="../index.html" class="icon icon-home" alt="Documentation Home"> Scrapy
          

          
          </a>

          
            
            
              <div class="version">
                2.3
              </div>
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <p class="caption"><span class="caption-text">第一步</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../intro/overview.html">Scrapy一目了然</a></li>
<li class="toctree-l1"><a class="reference internal" href="../intro/install.html">安装指南</a></li>
<li class="toctree-l1"><a class="reference internal" href="../intro/tutorial.html">Scrapy 教程</a></li>
<li class="toctree-l1"><a class="reference internal" href="../intro/examples.html">实例</a></li>
</ul>
<p class="caption"><span class="caption-text">基本概念</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="commands.html">命令行工具</a></li>
<li class="toctree-l1"><a class="reference internal" href="spiders.html">蜘蛛</a></li>
<li class="toctree-l1"><a class="reference internal" href="selectors.html">选择器</a></li>
<li class="toctree-l1"><a class="reference internal" href="items.html">项目</a></li>
<li class="toctree-l1"><a class="reference internal" href="loaders.html">项目加载器</a></li>
<li class="toctree-l1"><a class="reference internal" href="shell.html">Scrapy shell</a></li>
<li class="toctree-l1"><a class="reference internal" href="item-pipeline.html">项目管道</a></li>
<li class="toctree-l1"><a class="reference internal" href="feed-exports.html">Feed 导出</a></li>
<li class="toctree-l1"><a class="reference internal" href="request-response.html">请求和响应</a></li>
<li class="toctree-l1"><a class="reference internal" href="link-extractors.html">链接提取器</a></li>
<li class="toctree-l1"><a class="reference internal" href="settings.html">设置</a></li>
<li class="toctree-l1"><a class="reference internal" href="exceptions.html">例外情况</a></li>
</ul>
<p class="caption"><span class="caption-text">内置服务</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="logging.html">登录</a></li>
<li class="toctree-l1"><a class="reference internal" href="stats.html">统计数据集合</a></li>
<li class="toctree-l1"><a class="reference internal" href="email.html">发送电子邮件</a></li>
<li class="toctree-l1"><a class="reference internal" href="telnetconsole.html">远程登录控制台</a></li>
<li class="toctree-l1"><a class="reference internal" href="webservice.html">Web服务</a></li>
</ul>
<p class="caption"><span class="caption-text">解决具体问题</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../faq.html">常见问题</a></li>
<li class="toctree-l1"><a class="reference internal" href="debug.html">调试spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="contracts.html">蜘蛛合约</a></li>
<li class="toctree-l1"><a class="reference internal" href="practices.html">常用做法</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">宽爬行</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#use-the-right-scheduler-priority-queue">使用权利 <code class="xref std std-setting docutils literal notranslate"><span class="pre">SCHEDULER_PRIORITY_QUEUE</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#increase-concurrency">增加并发性</a></li>
<li class="toctree-l2"><a class="reference internal" href="#increase-twisted-io-thread-pool-maximum-size">增加Twisted IO线程池的最大大小</a></li>
<li class="toctree-l2"><a class="reference internal" href="#setup-your-own-dns">设置您自己的DNS</a></li>
<li class="toctree-l2"><a class="reference internal" href="#reduce-log-level">降低日志级别</a></li>
<li class="toctree-l2"><a class="reference internal" href="#disable-cookies">禁用Cookie</a></li>
<li class="toctree-l2"><a class="reference internal" href="#disable-retries">禁用重试</a></li>
<li class="toctree-l2"><a class="reference internal" href="#reduce-download-timeout">减少下载超时</a></li>
<li class="toctree-l2"><a class="reference internal" href="#disable-redirects">禁用重定向</a></li>
<li class="toctree-l2"><a class="reference internal" href="#enable-crawling-of-ajax-crawlable-pages">启用“Ajax可爬行页”的爬行</a></li>
<li class="toctree-l2"><a class="reference internal" href="#crawl-in-bfo-order">按BFO顺序爬行</a></li>
<li class="toctree-l2"><a class="reference internal" href="#be-mindful-of-memory-leaks">注意内存泄漏</a></li>
<li class="toctree-l2"><a class="reference internal" href="#install-a-specific-twisted-reactor">安装一个特殊的扭曲反应器</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="developer-tools.html">使用浏览器的开发人员工具进行抓取</a></li>
<li class="toctree-l1"><a class="reference internal" href="dynamic-content.html">选择动态加载的内容</a></li>
<li class="toctree-l1"><a class="reference internal" href="leaks.html">调试内存泄漏</a></li>
<li class="toctree-l1"><a class="reference internal" href="media-pipeline.html">下载和处理文件和图像</a></li>
<li class="toctree-l1"><a class="reference internal" href="deploy.html">部署蜘蛛</a></li>
<li class="toctree-l1"><a class="reference internal" href="autothrottle.html">AutoThrottle 扩展</a></li>
<li class="toctree-l1"><a class="reference internal" href="benchmarking.html">标杆管理</a></li>
<li class="toctree-l1"><a class="reference internal" href="jobs.html">作业：暂停和恢复爬行</a></li>
<li class="toctree-l1"><a class="reference internal" href="coroutines.html">协同程序</a></li>
<li class="toctree-l1"><a class="reference internal" href="asyncio.html">asyncio</a></li>
</ul>
<p class="caption"><span class="caption-text">扩展Scrapy</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="architecture.html">体系结构概述</a></li>
<li class="toctree-l1"><a class="reference internal" href="downloader-middleware.html">下载器中间件</a></li>
<li class="toctree-l1"><a class="reference internal" href="spider-middleware.html">蜘蛛中间件</a></li>
<li class="toctree-l1"><a class="reference internal" href="extensions.html">扩展</a></li>
<li class="toctree-l1"><a class="reference internal" href="api.html">核心API</a></li>
<li class="toctree-l1"><a class="reference internal" href="signals.html">信号</a></li>
<li class="toctree-l1"><a class="reference internal" href="exporters.html">条目导出器</a></li>
</ul>
<p class="caption"><span class="caption-text">其余所有</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../news.html">发行说明</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributing.html">为 Scrapy 贡献</a></li>
<li class="toctree-l1"><a class="reference internal" href="../versioning.html">版本控制和API稳定性</a></li>
</ul>

            
          
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">Scrapy</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
        
      <li>宽爬行</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="broad-crawls">
<span id="topics-broad-crawls"></span><h1>宽爬行<a class="headerlink" href="#broad-crawls" title="永久链接至标题">¶</a></h1>
<p>Scrapy   默认值针对爬行特定站点进行了优化。这些站点通常由一个残缺的蜘蛛来处理，尽管这不是必需的或必需的（例如，有一些普通的蜘蛛来处理任何向它们抛出的给定站点）。</p>
<p>除了这种“集中的爬行”之外，还有另一种常见的爬行类型，它覆盖了大量（可能是无限的）域，并且只受时间或其他任意约束的限制，而不是在域被爬行到完成或没有更多的请求执行时停止。这些被称为“宽爬虫”，是搜索引擎使用的典型爬虫。</p><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
<ins class="adsbygoogle"
     style="display:block; text-align:center;"
     data-ad-layout="in-article"
     data-ad-format="fluid"
     data-ad-client="ca-pub-1466963416408457"
     data-ad-slot="8850786025"></ins>
<script>
     (adsbygoogle = window.adsbygoogle || []).push({});
</script>
<p>这些是一些常见的特性，通常在广泛的爬行中发现：</p>
<ul class="simple">
<li><p>它们对许多域（通常是无边界的）而不是一组特定的站点进行爬网</p></li>
<li><p>它们不一定要对域进行爬网直至完成，因为这样做是不切实际的（或不可能的），而是按已爬网的时间或页数限制爬网。</p></li>
<li><p>它们在逻辑上更简单（而不是具有许多提取规则的非常复杂的spider），因为数据通常在单独的阶段进行后处理。</p></li>
<li><p>它们同时对多个域进行爬网，这使得它们可以通过不受任何特定站点约束的限制来实现更快的爬网速度（每个站点的爬行速度都很慢，以尊重礼貌，但许多站点是并行进行的）</p></li>
</ul>
<p>如上所述，Scrapy的默认设置是针对重点爬行而不是广泛爬行而优化的。然而，由于其异步体系结构，Scrapy非常适合执行快速的广泛爬行。本页总结了在使用Scrapy进行宽范围爬行时需要记住的一些事情，以及为实现高效的宽范围爬行而调整Scrapy设置的具体建议。</p>
<div class="section" id="use-the-right-scheduler-priority-queue">
<span id="broad-crawls-scheduler-priority-queue"></span><h2>使用权利 <a class="reference internal" href="settings.html#std-setting-SCHEDULER_PRIORITY_QUEUE"><code class="xref std std-setting docutils literal notranslate"><span class="pre">SCHEDULER_PRIORITY_QUEUE</span></code></a><a class="headerlink" href="#use-the-right-scheduler-priority-queue" title="永久链接至标题">¶</a></h2>
<p>Scrapy的默认调度程序优先级队列为 <code class="docutils literal notranslate"><span class="pre">'scrapy.pqueues.ScrapyPriorityQueue'</span></code> . 它在单域爬行时工作得最好。它不能很好地并行爬行许多不同的域</p>
<p>要应用建议的优先级队列，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SCHEDULER_PRIORITY_QUEUE</span> <span class="o">=</span> <span class="s1">&#39;scrapy.pqueues.DownloaderAwarePriorityQueue&#39;</span>
</pre></div>
</div>
</div>
<div class="section" id="increase-concurrency">
<span id="broad-crawls-concurrency"></span><h2>增加并发性<a class="headerlink" href="#increase-concurrency" title="永久链接至标题">¶</a></h2>
<p>Concurrency是并行处理的请求数。存在全局限制 (<a class="reference internal" href="settings.html#std-setting-CONCURRENT_REQUESTS"><code class="xref std std-setting docutils literal notranslate"><span class="pre">CONCURRENT_REQUESTS</span></code></a> ）以及每个域都可以设置的附加限制 (<a class="reference internal" href="settings.html#std-setting-CONCURRENT_REQUESTS_PER_DOMAIN"><code class="xref std std-setting docutils literal notranslate"><span class="pre">CONCURRENT_REQUESTS_PER_DOMAIN</span></code></a> ）或每IP (<a class="reference internal" href="settings.html#std-setting-CONCURRENT_REQUESTS_PER_IP"><code class="xref std std-setting docutils literal notranslate"><span class="pre">CONCURRENT_REQUESTS_PER_IP</span></code></a> ）</p>
<div class="admonition note">
<p class="admonition-title">注解</p>
<p>调度程序优先级队列 <a class="reference internal" href="#broad-crawls-scheduler-priority-queue"><span class="std std-ref">recommended for broad crawls</span></a> 不支持 <a class="reference internal" href="settings.html#std-setting-CONCURRENT_REQUESTS_PER_IP"><code class="xref std std-setting docutils literal notranslate"><span class="pre">CONCURRENT_REQUESTS_PER_IP</span></code></a> .</p>
</div>
<p>scrapy中的默认全局并发限制不适用于并行地对许多不同的域进行爬网，因此您需要增加它。增加多少将取决于您的爬虫有多少CPU和内存可用。</p>
<p>一个好的起点是 <code class="docutils literal notranslate"><span class="pre">100</span></code> ：：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CONCURRENT_REQUESTS</span> <span class="o">=</span> <span class="mi">100</span>
</pre></div>
</div>
<p>但最好的方法是做一些试验，并确定零碎的进程在什么样的并发上受到CPU限制。为了获得最佳性能，您应该选择CPU使用率为80-90%的并发性。</p>
<p>增加并发性也会增加内存使用量。如果担心内存使用问题，您可能需要相应地降低全局并发限制。</p>
</div>
<div class="section" id="increase-twisted-io-thread-pool-maximum-size">
<h2>增加Twisted IO线程池的最大大小<a class="headerlink" href="#increase-twisted-io-thread-pool-maximum-size" title="永久链接至标题">¶</a></h2>
<p>目前scrapy使用线程池以阻塞方式进行DNS解析。如果并发性级别更高，则爬行速度可能会变慢，甚至无法达到DNS解析程序超时。增加处理DNS查询的线程数的可能解决方案。将更快地处理DNS队列，从而加快建立连接和整体爬行。</p>
<p>要增加最大线程池大小，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">REACTOR_THREADPOOL_MAXSIZE</span> <span class="o">=</span> <span class="mi">20</span>
</pre></div>
</div>
</div>
<div class="section" id="setup-your-own-dns">
<h2>设置您自己的DNS<a class="headerlink" href="#setup-your-own-dns" title="永久链接至标题">¶</a></h2>
<p>如果您有多个爬行进程和单个中心DNS，它会像DOS攻击DNS服务器一样，导致整个网络速度减慢，甚至阻塞您的机器。要避免这种情况，请使用本地缓存设置您自己的DNS服务器，并向某些大型DNS（如OpenDNS或Verizon）上游设置。</p>
</div>
<div class="section" id="reduce-log-level">
<h2>降低日志级别<a class="headerlink" href="#reduce-log-level" title="永久链接至标题">¶</a></h2>
<p>当进行广泛的爬行时，你通常只对你得到的爬行率和发现的任何错误感兴趣。当使用 <code class="docutils literal notranslate"><span class="pre">INFO</span></code> 日志级别。为了保存CPU（和日志存储要求），不应使用 <code class="docutils literal notranslate"><span class="pre">DEBUG</span></code> 在生产中进行大型宽爬行时的原木水平。使用 <code class="docutils literal notranslate"><span class="pre">DEBUG</span></code> 不过，在开发（广泛的）爬虫时，级别可能很好。</p>
<p>要设置日志级别，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">LOG_LEVEL</span> <span class="o">=</span> <span class="s1">&#39;INFO&#39;</span>
</pre></div>
</div>
</div>
<div class="section" id="disable-cookies">
<h2>禁用Cookie<a class="headerlink" href="#disable-cookies" title="永久链接至标题">¶</a></h2>
<p>禁用cookies，除非 <em>真正地</em> 需要。在进行广泛的爬行时，通常不需要cookie（搜索引擎爬行器忽略它们），它们通过节省一些CPU周期和减少零碎爬行器的内存占用来提高性能。</p>
<p>要禁用cookie，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">COOKIES_ENABLED</span> <span class="o">=</span> <span class="kc">False</span>
</pre></div>
</div>
</div>
<div class="section" id="disable-retries">
<h2>禁用重试<a class="headerlink" href="#disable-retries" title="永久链接至标题">¶</a></h2>
<p>重试失败的HTTP请求会大大降低爬行速度，特别是当站点原因响应速度非常慢（或失败）时，会导致超时错误，该错误会被多次不必要地重试，从而阻止爬行器容量被重新用于其他域。</p>
<p>要禁用重试，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">RETRY_ENABLED</span> <span class="o">=</span> <span class="kc">False</span>
</pre></div>
</div>
</div>
<div class="section" id="reduce-download-timeout">
<h2>减少下载超时<a class="headerlink" href="#reduce-download-timeout" title="永久链接至标题">¶</a></h2>
<p>除非您是从一个非常慢的连接爬行（这不应该是广泛爬行的情况），否则请减少下载超时，以便快速丢弃卡住的请求并释放处理下一个请求的容量。</p>
<p>要减少下载超时，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">DOWNLOAD_TIMEOUT</span> <span class="o">=</span> <span class="mi">15</span>
</pre></div>
</div>
</div>
<div class="section" id="disable-redirects">
<h2>禁用重定向<a class="headerlink" href="#disable-redirects" title="永久链接至标题">¶</a></h2>
<p>考虑禁用重定向，除非您有兴趣跟踪它们。在进行广泛的爬行时，保存重定向并在以后的爬行中重新访问站点时解决重定向是很常见的。这也有助于保持每个爬网批处理的请求数不变，否则重定向循环可能会导致爬网程序在任何特定域上投入过多的资源。</p>
<p>要禁用重定向，请使用：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">REDIRECT_ENABLED</span> <span class="o">=</span> <span class="kc">False</span>
</pre></div>
</div>
</div>
<div class="section" id="enable-crawling-of-ajax-crawlable-pages">
<h2>启用“Ajax可爬行页”的爬行<a class="headerlink" href="#enable-crawling-of-ajax-crawlable-pages" title="永久链接至标题">¶</a></h2>
<p>一些页面（根据2013年的经验数据，高达1%）宣称自己是 <a class="reference external" href="https://developers.google.com/search/docs/ajax-crawling/docs/getting-started">ajax crawlable</a> . 这意味着它们提供了内容的纯HTML版本，通常只能通过Ajax提供。页面可以用两种方式表示：</p>
<ol class="arabic simple">
<li><p>通过使用 <code class="docutils literal notranslate"><span class="pre">#!</span></code> 在url中-这是默认方式；</p></li>
<li><p>通过使用一个特殊的元标签-这种方式在“主”、“索引”网站页面上使用。</p></li>
</ol>
<p>Scrapy  处理（1）自动；处理（2）启用 <a class="reference internal" href="downloader-middleware.html#ajaxcrawl-middleware"><span class="std std-ref">AjaxCrawlMiddleware</span></a> ：：</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">AJAXCRAWL_ENABLED</span> <span class="o">=</span> <span class="kc">True</span>
</pre></div>
</div>
<p>在进行广泛的爬行时，通常会对许多“索引”网页进行爬行；AjaxCrawl中间件有助于正确地对它们进行爬行。它在默认情况下是关闭的，因为它有一些性能开销，启用它进行聚焦爬行没有什么意义。</p>
</div>
<div class="section" id="crawl-in-bfo-order">
<span id="broad-crawls-bfo"></span><h2>按BFO顺序爬行<a class="headerlink" href="#crawl-in-bfo-order" title="永久链接至标题">¶</a></h2>
<p><a class="reference internal" href="../faq.html#faq-bfo-dfo"><span class="std std-ref">Scrapy crawls in DFO order by default</span></a> .</p>
<p>然而，在广泛的爬行中，页面爬行往往比页面处理更快。因此，未处理的早期请求将保留在内存中，直到达到最终深度，这可以显著增加内存使用量。</p>
<p><a class="reference internal" href="../faq.html#faq-bfo-dfo"><span class="std std-ref">Crawl in BFO order</span></a> 而是保存内存。</p>
</div>
<div class="section" id="be-mindful-of-memory-leaks">
<h2>注意内存泄漏<a class="headerlink" href="#be-mindful-of-memory-leaks" title="永久链接至标题">¶</a></h2>
<p>如果您的广泛爬行显示内存使用率很高，除了 <a class="reference internal" href="#broad-crawls-bfo"><span class="std std-ref">crawling in BFO order</span></a> 和 <a class="reference internal" href="#broad-crawls-concurrency"><span class="std std-ref">lowering concurrency</span></a> 你应该 <a class="reference internal" href="leaks.html#topics-leaks"><span class="std std-ref">debug your memory leaks</span></a> .</p>
</div>
<div class="section" id="install-a-specific-twisted-reactor">
<h2>安装一个特殊的扭曲反应器<a class="headerlink" href="#install-a-specific-twisted-reactor" title="永久链接至标题">¶</a></h2>
<p>如果爬网超出了系统的能力，您可能需要尝试通过 <a class="reference internal" href="settings.html#std-setting-TWISTED_REACTOR"><code class="xref std std-setting docutils literal notranslate"><span class="pre">TWISTED_REACTOR</span></code></a> 设置。</p>
</div>
</div>


           </div>
           
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="developer-tools.html" class="btn btn-neutral float-right" title="使用浏览器的开发人员工具进行抓取" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="practices.html" class="btn btn-neutral float-left" title="常用做法" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        
        &copy; 版权所有 2008–2020, Scrapy developers
      <span class="lastupdated">
        最后更新于 10月 18, 2020.
      </span>

    </p>
  </div>
    
    
    
    Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
    
    <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
    
    provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
  
 
<script type="text/javascript">
!function(){var analytics=window.analytics=window.analytics||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","page","once","off","on"];analytics.factory=function(t){return function(){var e=Array.prototype.slice.call(arguments);e.unshift(t);analytics.push(e);return analytics}};for(var t=0;t<analytics.methods.length;t++){var e=analytics.methods[t];analytics[e]=analytics.factory(e)}analytics.load=function(t){var e=document.createElement("script");e.type="text/javascript";e.async=!0;e.src=("https:"===document.location.protocol?"https://":"http://")+"cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js";var n=document.getElementsByTagName("script")[0];n.parentNode.insertBefore(e,n)};analytics.SNIPPET_VERSION="3.1.0";
analytics.load("8UDQfnf3cyFSTsM4YANnW5sXmgZVILbA");
analytics.page();
}}();

analytics.ready(function () {
    ga('require', 'linker');
    ga('linker:autoLink', ['scrapinghub.com', 'crawlera.com']);
});
</script>


</body>
</html>